/*
    file:   overlay.c
    desc:   OVRELAY utility implementation.

    author: Jaromir Dvorak (md@unicode.cz)

    This file is part of the AVGA platform.
    http://avga.prometheus4.com/
    

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

 */


#include <stdlib.h>
#include <avr/pgmspace.h>
#include "../core/driver.h"
#include "window.h"
#include "overlay.h"
#include "misc.h"

unsigned char overlay_block_id = 0;
#ifdef OVERLAY_REMEMBER_BLOCKS
unsigned char s_blk[OVERLAY_BLOCK_COUNT];
unsigned char *s_addr[OVERLAY_BLOCK_COUNT];
#endif


/*
 * 
 * Clears all drawn overlay objects.
 *
 */

#ifdef OVERLAY_CLEARABLE
void overlay_clear()
{
	goto a1;	//optimized
	while(1)
	{
		register unsigned char blk = s_blk[overlay_block_id];
		if(!driver_is_rammap(blk)) *s_addr[overlay_block_id] = blk;

#ifdef OVERLAY_INVERSE_PRIORITY
		s_addr[overlay_block_id]=0;
#endif

a1:		if(!overlay_block_id) return;
		overlay_block_id--;
	}
}
#endif



static inline unsigned char* _overlay_realloc_block(unsigned char* scraddr)										
{
	register unsigned char i = *scraddr;

	if(driver_is_rammap(i))
	{
		i-=DRIVER_PGM_BLOCK_COUNT;
		return (driver_get_ramblock_ptr(i));
	}

	if(overlay_block_id >= OVERLAY_BLOCK_COUNT)
#ifdef OVERLAY_INVERSE_PRIORITY
	overlay_block_id = 0;
	unsigned char *lastptr = s_addr[overlay_block_id];
	if(lastptr) *lastptr = s_blk[overlay_block_id];
#else
	return 0;
#endif

#ifdef OVERLAY_REMEMBER_BLOCKS
	s_blk[overlay_block_id] = i;
	s_addr[overlay_block_id] = scraddr;
#endif

	unsigned char* sp = driver_get_ramblock_ptr(overlay_block_id);

#ifdef OVERLAY_WINDOW
	unsigned char* tileset = (unsigned char*)window_get_tileset();
#else
	unsigned char* tileset = (unsigned char*)driver_get_tileset();
#endif
#ifdef OVERLAY_BGCOLOR
	memset(sp, (OVERLAY_BGCOLOR&0x0F) | ((OVERLAY_BGCOLOR&0x0F)<<4), DRIVER_BLOCK_SIZE);
#else
	memcpy_P(sp, &tileset[i * DRIVER_BLOCK_SIZE], DRIVER_BLOCK_SIZE);
#endif
	*scraddr = driver_rammap_block(overlay_block_id++);

	return sp;
}


/*
 *
 * overlay_draw(...)
 *
 * Note: This function is pretty complex and should be as fast as possible, it's well optimized for speed.
 *       However, it could be rewritten to asm completely to save some more time. 
 *       Special versions could be added to handle some special cases faster (no transparency, etc).
 *
 *
 * Note: This is extended configurable version, so it may seem difficult to understand.  
 *       All configurations are designed to allow the optimizer to always do the best job, use -o2.
 *       Every saved instructon is good in such time-critical system. 
 *
 */


#ifdef OVERLAY_DRAW
#ifdef OVERLAY_DRAW_PARAM
unsigned char overlay_draw_param
#else
unsigned char overlay_draw
#endif

(
#ifdef OVERLAY_DRAW_SAFE
	signed int x, signed int y, 
#else
	unsigned char x, unsigned char y,
#endif
	PGM_P img, 
	unsigned char width, unsigned char height
#ifdef OVERLAY_DRAW_PARAM
	, unsigned char param
#endif
)


{
	register unsigned char wndx = 0, wndy = DRIVER_MAXY; //removed by optimizer if not needed....
	register unsigned char *scraddr = driver_get_reftable();

	
	//At first, do all the selected calculations, to make the loop as fast as possible.


#ifdef OVERLAY_WINDOW				//align to window?
	scraddr = window_get_reftable();
	wndx = window_get_scrollX();
	x+=wndx;
	wndy = window_get_startline();
	y+=wndy;
	wndy += window_get_num_lines();
#endif

#ifdef OVERLAY_DRAW_TRANSFORMATIONS		
	register signed char lp = width >> 1;
	register signed char pp = 1;
	if(param & OVERLAY_HFLIP) { img += lp-1;  pp=-pp; }
	if(param & OVERLAY_VFLIP) { img += (height-1) * lp; lp=-lp; }
#else	//!OVERLAY_DRAW_TRANSFORMATIONS	
	register unsigned char lp = width >> 1;
	register unsigned char pp = 1;
#endif

#ifdef OVERLAY_DRAW_SAFE				//do clipping, or return if the image lies outside drawing area
	asm volatile("clt");
	wndx += DRIVER_RESX;
	wndx -= OVERLAY_DRAW_RIGHTMARGIN + OVERLAY_DRAW_LEFTMARGIN;
	register signed int tmp = x + width;
	if(tmp>wndx) tmp = wndx;
	if(x >= 0)   tmp -= x; 
    else
	{
		x = abs(x);
		if(x & 1)
		{
			asm volatile("set");
			tmp++;
		}

		x>>=1;
		img += x * pp;
		x = 0;
	}
	if(tmp <= 0) return 2; 				//x coord out of drawing area
	width = (unsigned char)tmp;
	x += OVERLAY_DRAW_LEFTMARGIN;

	wndy -= OVERLAY_DRAW_TOPMARGIN + OVERLAY_DRAW_BOTTOMMARGIN;
	tmp = y + height;
	if(tmp > wndy) tmp = wndy;
	if(y >= 0) tmp -= y;
	else
	{
		img += abs(y) * lp;
		y=0;
	}
	if(tmp <= 0) return 3;				//y coord out of draving area.
	height = (unsigned char)tmp;
	y += OVERLAY_DRAW_TOPMARGIN;
#endif


#ifndef OVERLAY_DRAW_IMGADDR 			//are we addresing the image?
	register PGM_P imgptr = img;
#endif

#ifdef OVERLAY_DRAW_COLORMODIFY			//XORing image pixels?
	param &= 0x0F;
#endif





	register unsigned char prex = DRIVER_X2PRE(x); //X2COL
	register unsigned char prey = DRIVER_Y2PRE(y); //Y2ROW
	scraddr += DRIVER_X2COL(x);
	scraddr += DRIVER_Y2ROW(y) * DRIVER_COLUMNS;
	prey*=4;

	// ready for drawing!


	do //source line loop
	{
		register unsigned char x1 = 0;    //x1 for img (PGM).
		register unsigned char inpix;

#ifdef OVERLAY_DRAW_IMGADDR
		register PGM_P imgptr = img; //addresing or continuous bitstream?
#endif


#ifdef OVERLAY_DRAW_SAFE

		if(SREG & 0x40)
		{
			x1++;
			if(pp<0) inpix = pgm_read_byte(imgptr--);
			else	
			{
			  inpix = pgm_read_byte(imgptr++);	 
			  asm volatile ("swap %0" : "=r" (inpix) : "0" (inpix));
			}
		}

/*asm volatile("		brtc b2 		\n\t"
			  "		inc %0			\n\t"

			  "		sbrs %3,7 		\n\t"
			  "		rjmp b1 		\n\t"
			  "		lpm %1, %a2 	\n\t"
			  "		sbiw r30,1  	\n\t"
			  "		rjmp b2 		\n\t"

			  "b1:  lpm %1, %a2+ 	\n\t"
			  "		swap %1			\n\t" 
			  "b2:					\n\t"
  
 			  : "=r" (x1), "=r" (inpix)
			  : "e" (imgptr), "r" (pp), "0" (x1), "1" (inpix));*/

#endif

		register unsigned char x2 = prex;
		register unsigned char* scrptr = scraddr;  //block pointer

		while(1)
		{
			unsigned char* sp;// = _overlay_realloc_block( scrptr );
			//if(!sp) return 1;

			//hand-inlined _overlay_realloc_block; saves few instructions when returning.
			register unsigned char i = *scrptr; 
			if(driver_is_rammap(i))
			{
				i-=DRIVER_PGM_BLOCK_COUNT;
				sp = driver_get_ramblock_ptr(i);
			}
			else
			{
				if(overlay_block_id >= OVERLAY_BLOCK_COUNT)

#ifdef OVERLAY_INVERSE_PRIORITY
				overlay_block_id = 0;
				unsigned char *lastptr = s_addr[overlay_block_id];
				if(lastptr) *lastptr = s_blk[overlay_block_id];
#else
				return 1;
#endif
#ifdef OVERLAY_REMEMBER_BLOCKS
				s_blk[overlay_block_id] = i;
				s_addr[overlay_block_id] = scrptr;
#endif
				sp = driver_get_ramblock_ptr(overlay_block_id);


#ifdef OVERLAY_WINDOW
				unsigned char* tileset = (unsigned char*)window_get_tileset();
#else
				unsigned char* tileset = (unsigned char*)driver_get_tileset();
#endif
#ifdef OVERLAY_BGCOLOR
				memset(sp, (OVERLAY_BGCOLOR&0x0F) | ((OVERLAY_BGCOLOR&0x0F)<<4), DRIVER_BLOCK_SIZE);
#else
				memcpy_P(sp, &tileset[i * DRIVER_BLOCK_SIZE], DRIVER_BLOCK_SIZE);
#endif
				*scrptr = driver_rammap_block(overlay_block_id++);
			}


			sp += prey; //line in block
			sp += x2 >> 1; //column in block

			register unsigned char pix;
			register unsigned char outpix = *sp;

			//todo: imgptr not Z in listing !!!

			for(;x2<8;x2++) //inner loop. should be as fast as possible.
			{
				if(!(x1&1)) 
				{
					//this test should only take one instruction.
					if(pp<0) { inpix = pgm_read_byte(imgptr--); goto newpixel; }  //removed as deadcode when not using transformations,
					inpix = pgm_read_byte(imgptr++); //lpm Z+
				    //asm volatile("lpm %A0, %a1+" : "=r" (inpix) : "e" (imgptr) );
				}

				asm volatile ("swap %0" : "=r" (inpix) : "0" (inpix));

newpixel:		pix = inpix & 0x0F;  //source pixel.


#ifdef OVERLAY_DRAW_ALPHA
				if(pix != OVERLAY_DRAW_ALPHA)	//transparent?
#endif
				{

#ifdef OVERLAY_DRAW_COLORMODIFY
					pix ^= param;
#endif
					if(x2 & 1) outpix &= 0xf0; 
					else { outpix &= 0x0f;  asm volatile ("swap %0" : "=r" (pix) : "0" (pix)); }

					outpix |= pix;
				}

				if(++x1 >= width) {  *sp = outpix;  goto newline; } //last pixel in source line
				if(x2 & 1) {  *sp++ = outpix; outpix = *sp; }
			}

			x2=0;
			scrptr++; //next block column
			//asm volatile("clt");
		}

newline:img += lp; //removed by optimizer when not addressing.

		if((prey+=4) >= (DRIVER_BLOCK_HEIGHT*4))
		{
			prey=0; 
			scraddr+=DRIVER_COLUMNS; //next block line
			//if(!--row) return 0; 
		}
	}
	while(--height);

	return 0;
}

#endif //OVERLAY_DRAW


/*
 *
 * Overlay primitives.
 *
 * Note: the API - _overlay_write_pixel_to_block and _overlay_realloc_block
 *       can be used to implement all basic primitives.
 *
 */


static inline void _overlay_write_pixel_to_block(unsigned char* ptr, unsigned char x, unsigned char y,  unsigned char color)
{
	ptr += y * 4;    
	ptr += x >> 1; 
	color &= 0x0f;
	register unsigned char outpix = *ptr;
	if(x & 1)  outpix &= 0xf0; 
	else { asm volatile ("swap %0" : "=r" (color) : "0" (color)); outpix &= 0x0f; }
	outpix |= color;
	*ptr = outpix;
}


#ifdef OVERLAY_PRIMITIVES

//
// classic pupixel. Behaves just like in full graphic.
//
unsigned char overlay_putpixel (unsigned char x, unsigned char y, unsigned char color)
{	

#ifdef OVERLAY_WINDOW		//align to scrolled window ?
	x += window_get_scrollX();
	y += window_get_startline();
#endif

	unsigned char* scrptr = &window_get_block(DIV8(x), DIV8(y));
	unsigned char* blockptr = _overlay_realloc_block(scrptr);
	if(!blockptr) return 1;
	_overlay_write_pixel_to_block(blockptr, MOD8(x), MOD8(y), color);
	return 0;
}

#endif //OVERLAY_PRIMITIVES








